//===============================================
//某个示例 http://beijing.jinpu.com/shangpu/zu/p1
//===============================================
var spider = require('../spider.js');

function fnSpiderData(data, nPage)
{
	//-0. src-
	var rtn = data;
	spider.db.fSave(rtn, "src", nPage, "txt");
			
	//-1.get src.ul part-
	rtn = dodata_ul(rtn);
	spider.db.fSave(rtn, "ul", nPage, "txt");

	//-2.get src.ul.li part-	
	rtn = dodata_li(rtn[0]);
	if(rtn[0] && rtn[0]!="")
	{
		//-2.1 src.ul.li-
		spider.db.fSave(rtn[0], "li", nPage, "txt");
		
		if(rtn[1].length>0)
		{
			//-2.2 src.ul.li.rtn-
			//-将结果保存为不同后缀文件，方便查找搜索-
			//-在windows下，利用copy *.dat rtn.dat 可以合并-
			//-然后在excel中利用分列可以拆分-
			spider.db.fSave(rtn[1].join('\r\n')+"\r\n", "rtn", nPage, "dat");
		}
	}
}

//-获取ul区间-
var rePattern_ul = new RegExp('<ul\\s?class="list_cent">[\\s\\S]*</ul>', "gm");
function dodata_ul(data)
{
	var	rtn = spider.reMatch(data,rePattern_ul);
	return rtn;
}

//-获取li，rtn-
var strPattern_li = '<li\\s+class="list_cent_li"\\s+\\S*>[\\s\\S]*?</li>';
var rePattern_li_trim = /\s{2,}/gm;		//li 弄成一行
var rePattern_li_crlf = /\r\n/gm;		//li 弄成一行
var rePattern_li_space = />\s</gm;		//去掉无关空格
var	rePattern_li = new RegExp(strPattern_li, "gm");
function dodata_li(data)
{
	var	rtn = spider.reMatch(data,rePattern_li);
	var objs = [];
	if(rtn && rtn.length>0)
	{
		var obj = null;
		for(var i=0; i<rtn.length; i++)
		{
			rtn[i] = spider.reReplace(rtn[i], rePattern_li_crlf, "");
			rtn[i] = spider.reReplace(rtn[i], rePattern_li_trim, " ");
			rtn[i] = spider.reReplace(rtn[i], rePattern_li_space, "><");

			obj = dodata_li_ex(rtn[i]);
			if(obj)
				objs.push(obj.join("||||"));
		}

		rtn = rtn.join("\r\n");
	}
	return [rtn, objs];
}

//-获取li.rtn-
var rePattern_li_titleurl = new RegExp('<a\\starget="_blank"\\shref="http://beijing\\.jinpu\\.com/shangpu/zu/(\\d+)">(.*)</a>', "gm");
var rePattern_li_address = new RegExp('<p\\sclass="detail">(.+?)</p><p\\sclass=', "gm");
var rePattern_li_demo = new RegExp('</p><p\\sclass="detail">(.+)</p><p', "gm");
var rePattern_li_month = new RegExp('<p><span\\sclass="orange">(\\S+)</span>\\S+/\\S+\\s<p>', "gm");
var rePattern_li_day = new RegExp('<p>([\\d\\.]+)\\S+/\\S+</p>', "gm");
function dodata_li_ex(data)
{
	var rtn  = [];
	var	rtn1 = spider.reGetSpecMatch(data,rePattern_li_titleurl,1,2);
	var	rtn2 = spider.reGetSpecMatch(data,rePattern_li_address,1);
	var	rtn3 = spider.reGetSpecMatch(data,rePattern_li_demo, 1);
	var	rtn4 = spider.reGetSpecMatch(data, rePattern_li_month, 1);
	var	rtn5 = spider.reGetSpecMatch(data, rePattern_li_day, 1);

	rtn.push(rtn1[0][0]);
	rtn.push(rtn1[0][1]);
	rtn.push(rtn2[0]);
	rtn.push(rtn3[0]);
	rtn.push(rtn4[0]);
	rtn.push(rtn5[0]);
	return rtn;
}


//-抓取页面-
var options = 
{
	host: 'beijing.jinpu.com',
	port: 80,
	path: '/shangpu/zu/p',
	method: 'GET'
};

var urls = [];
for(var i=1; i<=52; i++)
{
	urls.push(options.path+i);
}
var destobj = {path:'./prjtest', scale:1};
spider.spiderAll(options, urls, destobj, fnSpiderData);
